Student Admissions

Author

Amelia Baier, Andrea Dukic, Mia Mayerhofer

Introduction

Demographic Insights

Geographic

Code
import pandas as pd
import altair as alt
import seaborn as sns
import plotly.express as px
from vega_datasets import data
import matplotlib.pyplot as plt
Code
df = pd.read_csv('../data/clean_data.csv')
Code
custom_palette = ['#00072D', '#0A2472', '#0E6BA8', '#A6E1FA', '#99ABC5', '#8B748F', '#6F0624']
Code
#calculate averages of all numeric columns
num_cols = df[['State', 'GPA', 'WorkExp', 'TestScore', 'WritingScore', 'VolunteerLevel']]
avg_df = num_cols.groupby('State').mean().reset_index()
state_abbr = {
    'Alabama': 'AL',
    'California': 'CA',
    'Colorado': 'CO',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Mississippi': 'MS',
    'New York': 'NY',
    'Oregon': 'OR',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virginia': 'VA'

}
avg_df['State_Abbr'] = avg_df['State'].map(state_abbr)
avg_df = avg_df.drop(columns=['State'])
avg_df = avg_df.rename(columns={'State_Abbr': 'State'})
Code
import plotly.io as pio

# This ensures Plotly output works in multiple places:
# plotly_mimetype: VS Code notebook UI
# notebook: "Jupyter: Export to HTML" command in VS Code
# See https://plotly.com/python/renderers/#multiple-renderers
pio.renderers.default = "plotly_mimetype+notebook"
fig = px.choropleth(avg_df, locationmode="USA-states", 
                    locations=avg_df['State'], 
                    scope="usa",
                    color=avg_df['GPA'],
                    hover_data={"State": True, "GPA": True},  
                    labels={"GPA": "Selected Variable"},
                    color_continuous_scale=custom_palette
                )

dropdown = []
for col in avg_df.columns[:-1]:
    dropdown.append({'label': col, 'method': 'update', 'args': [{'z': [avg_df[col]]}]})

fig.update_layout(updatemenus=[{'buttons': dropdown, 'direction': 'down', 'showactive': True}],
                  title='Choropleth Map of Average Selected Variable')
fig.update_coloraxes(colorbar_title=dict(text='Selected Variable'))

fig.show()
Code
#create dataframe of rates for each state by decision
decision_state = df.groupby(['Decision', 'State'])[["GPA"]].count().reset_index()
decision_state = decision_state.rename(columns={'GPA':'StateCount'})
decision_state['DecisionCount'] = decision_state.groupby('Decision')['StateCount'].transform('sum')
decision_state['Rate'] = decision_state['StateCount'] / decision_state['DecisionCount'] * 100

state_id_dict = dict(zip(data.population_engineers_hurricanes()["state"], data.population_engineers_hurricanes()["id"]))
decision_state["StateID"] = decision_state["State"].map(state_id_dict)

admit_states = decision_state[decision_state['Decision'] == "Admit"]
decline_states = decision_state[decision_state['Decision'] == "Decline"]
Code
states = alt.topo_feature('https://raw.githubusercontent.com/vega/vega-datasets/master/data/us-10m.json', 'states')
click = alt.selection_multi(fields = ["State"])

existing_states = alt.Chart(states).mark_geoshape(stroke='black').encode(
    color = alt.Color("Rate:Q", scale=alt.Scale(range=custom_palette)),
    tooltip = ["State:N", "Rate:Q"],
    opacity = alt.condition('isValid(datum.Rate)', alt.value(1), alt.value(0.2)),
).transform_lookup(
    lookup = "id",
    from_ = alt.LookupData(admit_states, "StateID", list(admit_states.columns))
).properties(width = 333, height = 200, title="Admission Rates by State").add_selection(click).project(type = "albersUsa").interactive()

missing_states = (
    alt.Chart(states)
    .mark_geoshape(fill = "grey", stroke = "white")
    .encode(opacity=alt.condition("isValid(datum.Rate)", alt.value(0), alt.value(0.2))).add_selection(click).project(type = "albersUsa")
    )

admit_map = existing_states + missing_states
admit_map = admit_map.encode(
    tooltip= ["State:N", "Rate:Q"]
    ).transform_lookup(
        lookup="id",
        from_=alt.LookupData(admit_states, "StateID", list(admit_states.columns))
    ).interactive()

existing_states = alt.Chart(states).mark_geoshape(stroke='black').encode(
    color = alt.Color("Rate:Q", scale=alt.Scale(range=custom_palette)),
    tooltip = ["State:N", "Rate:Q"],
    opacity = alt.condition('isValid(datum.Rate)', alt.value(1), alt.value(0.2)),
).transform_lookup(
    lookup = "id",
    from_ = alt.LookupData(decline_states, "StateID", list(decline_states.columns))
).properties(width = 333, height = 200, title="Rejection Rates by State").add_selection(click).project(type = "albersUsa").interactive()

missing_states = (
    alt.Chart(states)
    .mark_geoshape(fill = "grey", stroke = "white")
    .encode(opacity=alt.condition("isValid(datum.Rate)", alt.value(0), alt.value(0.2))).add_selection(click).project(type = "albersUsa")
    )

decline_map = existing_states + missing_states
decline_map = decline_map.encode(
    tooltip= ["State:N", "Rate:Q"]
    ).transform_lookup(
        lookup="id",
        from_=alt.LookupData(decline_states, "StateID", list(decline_states.columns))
    ).interactive()

admit_map | decline_map

Gender

Machine Learning

Code
%%html
<img src="../website/images/decision_pairplot.png">

Conclusions